Target Audience
- Potential & Existing host
Purpose
- From our analysis, to find out how to be competitive during this time (political unrest, covid) for diff room types
# Store original str price to a new column
listings$price_str = listings$price
# Clean listing price to numeric
listings$price <- as.numeric(gsub('[$,]', '', listings$price))
# Filter listings to only those <HKD5000
listings <-listings[!(listings$price > 5000 ),]
## [1] "Apartment" "Guest suite" "Condominium"
## [4] "Bed and breakfast" "Hostel" "Hotel"
## [7] "House" "Campsite" "Aparthotel"
## [10] "Hut" "Loft" "Guesthouse"
## [13] "Serviced apartment" "Boutique hotel" "Townhouse"
## [16] "Other" "Bungalow" "Tiny house"
## [19] "Cave" "Boat" "Pension (South Korea)"
## [22] "Igloo" "Chalet" "Cottage"
## [25] "Treehouse" "Nature lodge" "Tent"
## [28] "Cabin" "Villa" "Vacation home"
## [31] "Minsu (Taiwan)" "Island" "Earth house"
## [34] "Dorm" "Houseboat" "Farm stay"
## [37] "Castle" "Casa particular (Cuba)" "Kezhan (China)"
## [40] "Resort"
Reviews <- listings %>% group_by(neighbourhood_cleansed) %>% summarise(avg_loc_review = mean(review_scores_location, na.rm = TRUE))
colnames(Reviews) <- c("Eng_name","value")
Reviews$Eng_name = replace(Reviews$Eng_name, Reviews$Eng_name == "Central & Western", "Central and Western" )
library(sp)
hkmap = readRDS("HKG_adm1_old.rds") # geo data of HK map
# Preprocessing
map_data = data.frame(id=hkmap$ID_1, Eng_name=hkmap$NAME_1)
map_data = merge(map_data, Reviews, by = 'Eng_name')
hkmapdf = fortify(hkmap)
map_data = merge(hkmapdf, map_data, by="id")
map_bg = ggplot(map_data, aes(long, lat, group=group, fill = value)) +
geom_polygon() + # Shape
scale_fill_gradient(limits=range(map_data$value),
low="#9999FF", high="#330066") +
layer(geom="path", stat="identity", position="identity",
mapping=aes(x=long, y=lat, group=group,
color=I('#FFFFFF'))) +
guides(fill = guide_legend(title = "Rating", title.position = "top")) +
ggtitle("Map showing Average Location Score by Area")
map_bg = map_bg + theme(axis.line=element_blank(),
axis.text.x=element_blank(), axis.title.x=element_blank(),
axis.text.y=element_blank(), axis.title.y=element_blank(),
axis.ticks=element_blank(), panel.background = element_blank()
)
map_bg
Prices <- listings %>% group_by(neighbourhood_cleansed) %>% summarise(avg_price = mean(price, na.rm = TRUE))
colnames(Prices) <- c("Eng_name","value")
Prices$Eng_name = replace(Prices$Eng_name, Prices$Eng_name == "Central & Western", "Central and Western" )
library(sp)
hkmap2 = readRDS("HKG_adm1_old.rds") # geo data of HK map
# Preprocessing
map_data2 = data.frame(id=hkmap2$ID_1, Eng_name=hkmap2$NAME_1)
map_data2 = merge(map_data2, Prices, by = 'Eng_name')
hkmapdf2 = fortify(hkmap2)
map_data2 = merge(hkmapdf2, map_data2, by="id")
map_bg = ggplot(map_data2, aes(long, lat, group=group, fill = value)) +
geom_polygon() + # Shape
scale_fill_gradient(limits=range(map_data2$value),
low="#FF9999", high="#CC0000") +
layer(geom="path", stat="identity", position="identity",
mapping=aes(x=long, y=lat, group=group,
color=I('#FFFFFF'))) +
guides(fill = guide_legend(title = "Rating", title.position = "top")) +
ggtitle("Map showing Average Price by Area")
map_bg = map_bg + theme(axis.line=element_blank(),
axis.text.x=element_blank(), axis.title.x=element_blank(),
axis.text.y=element_blank(), axis.title.y=element_blank(),
axis.ticks=element_blank(), panel.background = element_blank()
)
map_bg
### Price per person scatter plots
Pricesperperson <- select(listings,'accommodates', 'price','review_scores_rating','neighbourhood_cleansed')
Pricesperperson <- within(Pricesperperson, price_per_pax <- price/accommodates)
Pricesperperson<-Pricesperperson[!(Pricesperperson$price_per_pax > 10000 || Pricesperperson$price_per_pax < 20 ),]
ggplot(data = Pricesperperson, mapping = aes(x = price_per_pax, y = review_scores_rating, color = neighbourhood_cleansed)) + geom_point()
## Warning: Removed 4291 rows containing missing values (geom_point).
propertydf <- listings %>% group_by(neighbourhood_cleansed, property_type) %>% summarize(Freq = n())
propertydf <- propertydf %>% filter(property_type %in% c("Apartment","House","Condominium","Townhouse", "Loft"))
totalproperty<- listings %>% filter(property_type %in% c("Apartment","House","Condominium","Townhouse", "Loft"))%>% group_by(neighbourhood_cleansed) %>% summarize(sum = n())
propertyratio <- merge(propertydf, totalproperty, by="neighbourhood_cleansed")
propertyratio <- propertyratio %>% mutate(ratio = Freq/sum)
ggplot(propertyratio, aes(x=neighbourhood_cleansed, y=ratio, fill = property_type)) +
geom_bar(position = "stack",stat="identity") + xlab("Borough") + ylab("Count")+
scale_fill_discrete(name = "Property Type") +
scale_y_continuous(labels = scales::percent) +
ggtitle("Which types of Listings are there in Hong Kong?",
subtitle = "Map showing Count of Listing Type by Borough ") +
theme(plot.title = element_text(face = "bold")) +
theme(plot.subtitle = element_text(face = "bold", color = "grey35")) +
theme(plot.caption = element_text(size=10, color = "grey68"))+scale_color_gradient(low="#d3cbcb", high="#852eaa")+
theme(axis.text.x = element_text(size = 8) ) +
scale_fill_manual("Property Type", values=c("#FF3300","#F0E442", "#009E73", "#56B4E9", "#666666")) +
xlab("Neighborhood") + ylab("Percentage")+coord_flip()
### Demand Price Analysis